\subsection{Definition}
\begin{definition}
	Let \( M \) be a set.
	Then a \textit{metric} on \( M \) is a function \( d \colon M \times M \to \mathbb R \) such that
	\begin{enumerate}
		\item (positivity) \( \forall x,y \in M, d(x,y) \geq 0 \), and in particular, \( x = y \iff d(x,y) = 0 \)
		\item (symmetric) \( \forall x,y \in M, d(x,y) = d(y,x) \)
		\item (triangle inequality) \( \forall x,y,z \in M, d(x,z) \leq d(x,y) + d(y,z) \).
	\end{enumerate}
	A metric space is a set \( M \) together with a metric \( d \) on \( M \), written as the pair \( (M, d) \).
\end{definition}
\begin{example}
	Let \( M = \mathbb R, \mathbb C \) and \( d(x,y) = \abs{x-y} \).
	This is known as the `standard metric' on \( M \).
	If a metric is not specified, the standard metric is taken as implied.
\end{example}
\begin{example}
	Let \( M = \mathbb R^n, \mathbb C^n \), and we define the Euclidean norm (or Euclidean length) to be
	\[
		\norm{x} = \norm{x}_2 = \qty(\sum_{k=1}^n \abs{x_k}^2)^{\frac{1}{2}}
	\]
	This satisfies
	\[
		\norm{x+y} \leq \norm{x} + \norm{y}
	\]
	and it then follows that we can define the metric as
	\[
		d_2(x,y) = \norm{x-y}_2
	\]
	called the Euclidean metric.
	We can check that this is indeed a metric easily.
	This is the standard metric on \( \mathbb R^n, \mathbb C^n \).
	The metric space \( (M, d) \) in this case is called \( n \)-dimensional real (or complex) Euclidean space, sometimes denoted \( \ell_2^n \).
	The Euclidean norm is sometimes called the \( \ell_2 \) norm, and the Euclidean metric is the \( \ell_2 \) metric.
\end{example}
\begin{example}
	Let \( M = \mathbb R^n, \mathbb C^n \), and we define the \( \ell_1 \) norm to be
	\[
		\abs{x}_1 = \sum_{k=1}^n \abs{x_k}
	\]
	which defines the \( \ell_1 \) metric given by
	\[
		d_1(x,y) = \norm{x-y}_1
	\]
	\( (M, d_1) \) is denoted \( \ell_1^n \).
	We can generalise and form the metric space \( \ell_p^n \) for all \( p \in [1, \infty] \).
\end{example}
\begin{example}
	Again, let \( M = \mathbb R^n, \mathbb C^n \).
	We can define the \( \ell_\infty \) norm by
	\[
		\norm{x}_\infty = \max_{1 \leq k \leq n} \abs{x_k}
	\]
	This defines the \( \ell_\infty \) metric:
	\[
		d_\infty(x,y) = \norm{x-y}_\infty = \max_{1 \leq k \leq n} \abs{x_k - y_k}
	\]
	We denote \( (M, d) \) by \( \ell_\infty^n \).
\end{example}
\textit{In this course, we will only work with \( p = 1, 2, \infty \), although the calculations can be made to work for other \( p \).}
\begin{example}
	Let \( S \) be a set.
	Let \( \ell_\infty(S) \) be the set of all bounded scalar functions on \( S \).
	We then define the \( \ell_\infty \) norm of \( f \in \ell_\infty(S) \) by
	\[
		\norm{f} = \norm{f}_\infty = \sup_{x \in S} \abs{f(x)}
	\]
	The supremum exists since the function is always bounded.
	This is also known as the `sup norm' or the `uniform norm'.
	Note that, for \( f,g \in \ell_\infty(S) \), and \( x \in S \),
	\[
		\norm{f+g} \leq \sup_{x \in S} \abs{f(x) + g(x)} \leq \abs{f(x) + g(x)} \leq \abs{f(x)} + \abs{g(x)} \leq \norm{f} + \norm{g}
	\]
	Hence \( d(f,g) =\norm{f-g} \) defines a metric on \( \ell_\infty(S) \).
	This is the standard metric on this space \( \ell_\infty(S) \), also called the `uniform metric'.
	For example, \( \ell_\infty(\qty{1, \dots, n}) = \mathbb R^n \) with the metric \( \ell_\infty \).
	Also, for \( \ell_\infty(\mathbb N) \), we typically omit the \( \mathbb N \) and instead write \( \ell_\infty \) for the space of scalar sequences with the uniform metric.
\end{example}
\begin{example}
	Consider \( C[a,b] \), the set of all continuous functions on \( [a,b] \).
	For \( p = 1,2 \), we define the \( L_p \) norm of \( f \in C[a,b] \) by
	\[
		\norm{f}_p = \qty( \int_a^b \abs{f(x)}^p \dd{x} )^{\frac{1}{p}}
	\]
	which induces the \( L_p \) metric on \( C[a,b] \).
\end{example}
\begin{example}
	Let \( M \) be a set.
	Then
	\[
		d(x,y) = \begin{cases}
			0 & \text{if } x = y \\
			1 & \text{otherwise}
		\end{cases}
	\]
	is a metric, called the discrete metric on \( M \).
	In particular, \( (M, d) \) is called a discrete metric space.
\end{example}
\begin{example}
	Let \( G \) be a group generated by \( S \subset G \).
	We assume \( e \not\in S \) and \( x \in S \implies x^{-1} \in S \).
	Then
	\[
		d(x,y) = \min \qty{ n \geq 0 \colon \exists s_1, \dots, s_n, y = x s_1 \dots s_n }
	\]
	defines a metric called the word metric.
\end{example}
\begin{example}
	Let \( p \) be prime.
	Then
	\[
		d(x,y) = \begin{cases}
			0      & \text{if } x = y                                                     \\
			p^{-n} & \text{otherwise, where } x - y = p^n m, n \geq 0, m \in Z, p \nmid m
		\end{cases}
	\]
	defines a metric on \( \mathbb Z \).
	This is known as the \( p \)-adic metric.
\end{example}

\subsection{Subspaces}
Let \( (M, d) \) be a metric space, and \( N \subset M \).
Then naturally we can restrict \( d \) to \( N \times N \), giving a metric on \( N \).
\( (N, d) \) is called a subspace of \( M \).
\begin{example}
	Consider \( \mathbb Q \) with the metric \( d(x,y) = \abs{x-y} \).
	This is clearly a subspace of \( \mathbb R \) (implicitly with the standard metric on \( \mathbb R \)).
\end{example}
\begin{example}
	Since every continuous function on a closed bounded interval is bounded, \( C[a,b] \) is a subset of \( \ell_\infty[a,b] \).
	Hence \( C[a,b] \) with the uniform metric is a subspace of \( \ell_\infty[a,b] \).
\end{example}

\subsection{Product spaces}
Let \( (M, d), (M', d') \) be metric spaces.
Then any of the following defines a metric on the Cartesian product \( M \times M' \).
\begin{enumerate}
	\item \( d_1 ((x,x'), (y,y')) = d(x,y) + d(x',y') \)
	\item \( d_2 ((x,x'), (y,y')) = \qty(d(x,y)^2 + d(x',y')^2)^{\frac{1}{2}} \)
	\item \( d_\infty ((x,x'), (y,y')) = \max\qty{ d(x,y), d(x',y') } \)
\end{enumerate}
We commonly write \( (M \times M', p) \) as \( M \oplus_p M' \).
Note that we always have
\[
	d_\infty \leq d_2 \leq d_1 \leq 2 d_\infty
\]
We can generalise for \( n \in \mathbb N \) and metric spaces \( (M_k, d_k) \) for \( k \in \qty{1,\dots,n} \), by defining
\[
	\qty( \bigoplus_{k=1}^n M_k )_p = M_1 \oplus_p \dots \oplus_p M_n = \qty( M_1 \times \dots \times M_n, d_p )
\]
\begin{example}
	\( \mathbb R \oplus_1 \mathbb R = \ell_1^2 \).
	Further, \( \mathbb R \oplus_2 \mathbb R \oplus_2 R = \ell_2^3 \), and other analogous results hold.
\end{example}
\begin{remark}
	\( \mathbb R \oplus_1 \mathbb R \oplus_2 \mathbb R \) does not make sense since we have not defined the associativity of the \( \oplus \) operator.
	The two choices yield different metric spaces.
\end{remark}

\subsection{Convergence}
Let \( M \) be a metric space, and \( (x_n) \) a sequence in \( M \).
Given \( x \in M \), we say that \( (x_n) \) converges to \( x \) in \( M \) if
\[
	\forall \varepsilon > 0, \exists N \in \mathbb N, \forall n \geq N, d(x_n,x)<\varepsilon
\]
We say that \( (x_n) \) is convergent in \( M \) if \( \exists x \in M \) such that \( x_n \to x \).
Otherwise, we say that \( (x_n) \) is divergent.
Note that \( x_n \to x \) in \( M \) if and only if \( d(x_n, x) \to 0 \) in \( \mathbb R \).
\begin{lemma}
	Suppose we have a sequence \( x_n \to x \) and \( x_n \to y \) in a metric space \( M \).
	Then \( x = y \).
\end{lemma}
\begin{proof}
	Suppose \( x \neq y \).
	Then let \( \varepsilon = \frac{d(x,y)}{3} > 0 \).
	So, by the definition of convergence,
	\[
		\exists N_1 \in \mathbb N, \forall n \geq N_1, d(x_n, x) < \varepsilon;
	\]
	\[
		\exists N_2 \in \mathbb N, \forall n \geq N_2, d(x_n, y) < \varepsilon
	\]
	Now, fix \( N \in \mathbb N \) such that \( n \geq N_1, n \geq N_2 \), for instance \( N = \max\qty{N_1, N_2} \).
	Then
	\[
		d(x,y) \leq d(x, x_n) + d(x_n, y) < 2\varepsilon = \frac{2}{3} d(x,y)
	\]
	which is a contradiction.
\end{proof}
\begin{definition}
	Given a convergent sequence \( (x_n) \) in a metric space \( M \), we say the \textit{limit} of \( (x_n) \) is the unique \( x \in M \) such that \( x_n \to x \) as \( n \to \infty \).
	This is denoted
	\[
		\lim_{n \to \infty} x_n
	\]
\end{definition}
\begin{example}
	This definition has the usual meaning when \( M = \mathbb R, \mathbb C \).
\end{example}
\begin{example}
	The constant sequence defined by \( x_n = x \) converges to \( x \).
	In particular, `eventually constant' sequences converge; let \( (x_n) \) be a sequence in \( M \) such that \( \exists x \in M, \exists N \in \mathbb N, \forall n \geq N, x_n = x \), then \( x_n \to x \).
	It is not necessarily true that sequences only converge if they are eventually constant.
	However, in a discrete metric space, the converse is true, since we can choose \( \varepsilon \) smaller than all distances.
\end{example}
\begin{example}
	Consider the \( 3 \)-adic metric.
	Then, \( 3^n \to 0 \) as \( n \to \infty \) since \( d(3^n, 0) = 3^{-n} \to 0 \).
\end{example}
\begin{example}
	Let \( S \) be a set.
	Then, \( f_n \to f \) in \( \ell_\infty(S) \) in the uniform metric if and only if \( d(f_n, f) = \norm{f_n - f}_\infty = \sup_S \abs{f_n - f} \to 0 \), which is precisely the condition that \( f_n \to f \) uniformly on \( S \).
	Note, however, that \( f_n(x) = x + \frac{1}{n} \) for \( x \in \mathbb R, n \in \mathbb N \) and \( f(x) = x \), then certainly \( f_n \to x \) uniformly on \( \mathbb R \).
	However, \( f_n, f \not\in \ell_\infty(\mathbb R) \), so the uniform metric is not defined on these functions.
	So the notion of uniform convergence visited before is slightly more general than the idea of convergence in this metric space.
\end{example}
\begin{example}
	Consider Euclidean space \( M = \mathbb R^n, \mathbb C^n \) with the \( \ell_2 \) metric.
	Then, consider
	\[
		x^{(k)} = \qty(x^{(k)}_1, \dots, x^{(k)}_n) \in M
	\]
	for \( k \in \mathbb N \), and \( x = (x_1, \dots, x_n) \in M \).
	Then,
	\[
		\abs{x^{(k)}_i - x_i} \leq \norm{x^{(k)} - x}_2 \leq \sum_{i=1}^n \abs{x^{(k)}_i - x_i}
	\]
	So \( x^{(k)} \to x \) if and only if all \( i \) satisfy \( x^{(k)}_i \to x_i \).
	This can be thought of as convergence being equivalent to coordinate-wise (or pointwise) convergence.
\end{example}
\begin{example}
	Consider \( f_n(x) = x^n \) for \( x \in [0,1] \), and \( n \in \mathbb N \).
	Then \( (f_n) \) is a sequence in \( C[0,1] \), which converges pointwise but not uniformly.
	So \( (f_n) \) is not convergent in the uniform metric.
	However, using the \( L_1 \) metric, we have
	\[
		d_1(f_n, 0) = \norm{f_n}_1 = \int_0^1 f_n = \frac{1}{n+1} \to 0
	\]
	So, \( f_n \to 0 \) in \( (C[0,1], L_1) \).
\end{example}
\begin{example}
	Let \( N \) be a subspace of a metric space \( M \), and \( (x_n) \) be a convergent sequence in \( N \).
	Then \( (x_n) \) converges in \( M \).
	The converse is not necessarily true; consider \( M = \mathbb R \) and \( N = (0, \infty) \) with \( (x_n) = \frac{1}{n} \).
	This is divergent in \( N \) but convergent in \( M \).
\end{example}
\begin{example}
	Let \( (M, d), (M', d') \) be metric spaces.
	Let \( N = M \oplus_p M' \).
	Let \( a_n = (x_n, y_n) \in N \) for all \( n \in \mathbb N \), and \( a = (x, y) \in N \).
	Then
	\[
		a_n \to a \text{ in } N \iff x_n \to x \text{ in } M, y_n \to y \text{ in } M'
	\]
	Indeed,
	\[
		\max\qty{ d(x_n, x), d'(y_n, y) } = d_\infty(a_n, a) \leq d_p(a_n, a) \leq 2 d_1(a_n, a) = 2d(x_n, x) + 2d'(y_n, y)
	\]
\end{example}

\subsection{Continuity}
\begin{definition}
	Let \( f \colon M \to M' \) be a function between metric spaces \( (M, d), (M', d') \).
	Then for \( a \in M \), we say \( f \) is continuous at \( a \) if
	\[
		\forall \varepsilon > 0, \exists \delta > 0, \forall x \in M, d(x,a) < \delta \implies d'(f(x), f(a)) < \varepsilon
	\]
	We say \( f \) is continuous if \( f \) is continuous at \( a \) for all \( a \in M \).
	In other words,
	\[
		\forall a \in M, \forall \varepsilon > 0, \exists \delta > 0, \forall x \in M, d(x,a) < \delta \implies d'(f(x), f(a)) < \varepsilon
	\]
	Note that \( \delta \) depends both on \( \varepsilon \) and \( a \).
\end{definition}
\begin{proposition}
	Let \( f \colon M \to M' \) be as above.
	Let \( a \in M \).
	Then the following are equivalent:
	\begin{enumerate}
		\item \( f \) is continuous at \( a \);
		\item \( x_n \to a \) in \( M \) implies \( f(x_n) \to f(a) \) in \( M \)
	\end{enumerate}
\end{proposition}
\begin{proof}
	First we show (i) implies (ii).
	Suppose \( x_n \to a \) in \( M \).
	Then fix \( \varepsilon > 0 \), and seek \( N \in \mathbb N \) such that \( \forall n \geq N, d'(f(x_n), f(a)) < \varepsilon \).
	By continuity, there exists \( \delta > 0 \) such that \( \forall x \in M, d(x,a) < \delta \implies d'(f(x_n), f(a)) < \varepsilon \) as required.
	So we want \( N \) such that \( \forall n \geq N, d(x,a) < \delta \), which must exist since \( x_n \to a \).

	Now, we show (ii) implies (i).
	Suppose that \( f \) is not continuous at \( a \).
	Then,
	\[
		\exists \varepsilon > 0, \forall \delta > 0, \exists x \in M, d(x,a) < \delta, d'(f(x), f(a)) \geq \varepsilon
	\]
	So fix such an \( \varepsilon \) for which no suitable \( \delta \) exists.
	Choose the sequence \( \delta_n = \frac{1}{n} \), so
	\[
		d(x_n,a) < \frac{1}{n};\quad d'(f(x_n), f(a)) \geq \varepsilon
	\]
	Then \( x_n \to a \) in \( M \) but \( f(x_n) \nrightarrow f(a) \) in \( M \), which is a contradiction.
\end{proof}
\begin{proposition}
	Let \( f,g \) be scalar functions on a metric space \( M \).
	Let \( a \in M \).
	Then if \( f,g \) are continuous at \( a \), so are \( f+g \) and \( f \cdot g \).
	Moreover, letting \( N = \qty{x \in M \colon g(x) \neq 0} \) and assuming \( a \in N \), \( \frac{f}{g} \) is continuous at \( a \).
	Hence if \( f,g \) are continuous, then so are \( f+g, f \cdot g, \frac{f}{g} \) where they are defined.
\end{proposition}
\begin{proof}
	Suppose \( x_n \to a \).
	Then by the above proposition, \( (f\cdot g)(x_n) = f(x_n) \cdot g(x_n) \to f(a) \cdot g(a) = (f \cdot g)(a) \), and similar results hold for the other operators.
\end{proof}
\begin{remark}
	If \( f \colon M \to M' \) is continuous everywhere,
	\[
		\lim_{n \to \infty} f(x_n) = f\qty(\lim_{n \to \infty} x_n)
	\]
	by the second proposition.
\end{remark}
\begin{proposition}
	Let \( f \colon M \to M', g \colon M' \to M'' \) be functions between metric spaces.
	If \( f \) is continuous at \( a \) and \( g \) is continuous at \( f(a) \), then \( g \circ f \) is continuous at \( a \).
	If \( f,g \) are continuous, \( g \circ f \) is continuous.
\end{proposition}
\begin{proof}
	Let \( \varepsilon > 0 \).
	We want to find \( \delta > 0 \) such that \( \forall x \in M \),
	\[ d(x,a) < \delta \implies d''(g(f(x)), g(f(a))) < \varepsilon \]
	Since \( g \) is continuous at \( f(a) \), there exists \( \eta > 0 \) such that \( \forall y \in M' \),
	\[ d'(y,f(a)) < \eta \implies d''(g(y), g(f(a))) < \varepsilon \]
	Now, since \( f \) is continuous at \( a \), for this \( \eta \) there exists \( \delta \) such that for all \( x \in M \),
	\[ d(x,a) < \delta \implies d'(f(x) - f(a)) < \eta \]
	Then
	\[ d(x,a) < \delta \implies d''(g(f(x)), g(f(a))) < \varepsilon \]
	as required.
\end{proof}

\begin{example}
	Constant functions are continuous.
	For instance, let \( b \in M \) and let \( f(x) = b \).
	Then this is continuous since \( d'(f(x) - f(a)) = d'(b,b) = 0 \) so any \( \delta > 0 \) will satisfy the condition.
\end{example}
\begin{example}
	The identity function \( f \colon M \to M \) defined by \( x \mapsto x \) is continuous.
	Consider \( d(f(x) - f(a)) = d(x-a) \).
	So \( \delta = \varepsilon \) will suffice.
\end{example}
\begin{example}
	All real and complex polynomials and rational functions are continuous wherever they are defined by the propositions and examples above.
	In fact, using uniform convergence, the uniform limits of such functions are also continuous.
	For example, exponential and trigonometric functions are continuous.
\end{example}
\begin{example}
	Let \( (M, d) \) be a metric space.
	Then \( d \colon M \oplus_p M \to \mathbb R \), which can be viewed as a function between metric spaces \( M \oplus_p M \) and \( \mathbb R \).
	Then, given \( v = (x,x'), w = (y,y') \in M \oplus_p M \),
	\[
		\abs{d(v) - d(w)} = \abs{d(x,x') - d(y,y')} \leq d(x,y) + d(x',y') = d_1(v,w) \leq 2 d_p(v,w)
	\]
	Hence \( \delta = \frac{\varepsilon}{2} \) will suffice.
\end{example}

\subsection{Isometric, Lipschitz, and uniformly continuous functions}
\begin{definition}
	Let \( f \colon M \to M' \) be a function between metric spaces.
	Then, \( f \) is
	\begin{enumerate}
		\item \textit{isometric}, if \[ \forall x,y \in M, d'(f(x),f(y)) = d(x,y) \]
		\item \textit{Lipschitz}, or \( c \)-Lipschitz, if \[ \exists c \in \mathbb R^+, \forall x,y \in M, d'(f(x),f(y)) \leq c\cdot d(x,y) \]
		\item \textit{uniformly continuous}, if \[ \forall \varepsilon > 0, \exists \delta > 0, \forall x,y \in M, d(x,y) < \delta \implies d'(f(x), f(y)) < \varepsilon \]
	\end{enumerate}
\end{definition}
\begin{remark}
	Any isometric function is 1-Lipschitz.
	Any Lipschitz function is uniformly continuous.
	Any uniformly continuous function is continuous.
\end{remark}
\begin{remark}
	If a function is isometric, it is injective, since \( f(x) = f(y) \implies x = y \).
	For example, if \( N \subset M \), the inclusion map \( i \colon N \to M \) defined by \( i(x) = x \) is isometric but not surjective.
	An isometric and surjective map is called an \textit{isometry}.
	If there exists an isometry \( M \to M' \), we say that \( M \) and \( M' \) are isometric metric spaces, or \( M' \) is an isometric copy of \( M \).
\end{remark}
\begin{example}
	Suppose \( (M, d), (M', d') \) be metric spaces.
	Let \( y \in M' \).
	We define \( f \colon M \to M \oplus_p M' \) by \( x \mapsto (x,y) \).
	Then \( d_p(f(x),f(z)) = d_p((x,y), (z,y)) = d(x,z) \).
	So the function \( f \) is isometric.
	Therefore, \( M \times \qty{ y } \) is an isometric copy of \( M \) in \( M \oplus_p M' \).
\end{example}
\begin{example}
	Consider the projections \( q \colon M \oplus_p M' \to M \) defined by \( q(x,y) = x \) and \( q' \colon M \oplus_p M' \to M' \) defined by \( q'(x,y) = y \).
	These projections are both 1-Lipschitz.
	Indeed,
	\[
		d(q(x,y), q(x',y')) = d(x,x') \leq d_p((x,y), (x',y'))
	\]
	In particular, polynomials in any finite number of variables are continuous since we can multiply continuous functions together.
\end{example}

\subsection{Generalised triangle inequality}
Suppose \( u,x,y,z \in M \).
Then, \( \abs{d(u,x) - d(y,z)} \leq d(u,y) + d(x,z) \).
First,
\[
	d(u,x) \leq d(u,y) + d(y,x) \leq d(u,y) + d(y,z) + d(z,x)
\]
Rearranging,
\[
	d(u,x)-d(y,z) \leq d(u,y) + d(x,z)
\]
To achieve the negative, satisfying both conditions in the absolute value term,
\[
	d(y,z) \leq d(y,u) + d(u,x) + d(x,z)
\]
which gives
\[
	d(y,z) - d(u,x) \leq d(u,y) + d(x,z)
\]
as required.
